{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## re.match"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<_sre.SRE_Match object; span=(0, 41), match='Hello 123 4567 World_This is a Regex Demo'>\n",
      "Hello 123 4567 World_This is a Regex Demo\n",
      "(0, 41)\n"
     ]
    }
   ],
   "source": [
    "import re\n",
    "content = 'Hello 123 4567 World_This is a Regex Demo'\n",
    "result = re.match('^Hello\\s\\d{3}\\s\\d{4}\\s\\w.*Demo$', content)\n",
    "print(result)\n",
    "print(result.group())\n",
    "print(result.span())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 泛目标"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<_sre.SRE_Match object; span=(0, 41), match='Hello 123 4567 World_This is a Regex Demo'>\n",
      "Hello 123 4567 World_This is a Regex Demo\n",
      "(0, 41)\n"
     ]
    }
   ],
   "source": [
    "import re\n",
    "content = 'Hello 123 4567 World_This is a Regex Demo'\n",
    "result = re.match('^Hello.*Demo$', content)\n",
    "print(result)\n",
    "print(result.group())\n",
    "print(result.span())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##  匹配目标"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import re\n",
    "content = 'Hello 123 4567 World_This is a Regex Demo'\n",
    "result = re.match('', content)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 匹配模式"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<_sre.SRE_Match object; span=(0, 40), match='Hello 1234567 World_This\\nis a Regex Demo'>\n",
      "1234567\n",
      "(0, 40)\n"
     ]
    }
   ],
   "source": [
    "content = '''Hello 1234567 World_This\n",
    "is a Regex Demo'''\n",
    "result = re.match('^He.*?(\\d+).*Demo$', content, re.S)\n",
    "print(result)\n",
    "print(result.group(1))\n",
    "print(result.span())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## re.findall"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[('/1.html', 'aaa'), ('/2.html', 'bbb'), ('/3.html', 'ccc')]\n"
     ]
    }
   ],
   "source": [
    "import re\n",
    "html = '''<div>\n",
    "<ul>\n",
    "    <li><a href=\"/1.html\">aaa</a></li>\n",
    "    <li><a href=\"/2.html\">bbb</a></li>\n",
    "    <li>\n",
    "        <a href=\"/3.html\">ccc</a>\n",
    "    </li>\n",
    "</ul>\n",
    "</div>'''\n",
    "result = re.findall('<li.*?href=\"(.*?)\".*?>(.*?)</a>', html, re.S)\n",
    "print(result)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 实战练习"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "hello\n"
     ]
    }
   ],
   "source": [
    "import requests\n",
    "import re\n",
    "print('hello')\n",
    "headers = {\n",
    "    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36'\n",
    "}\n",
    "content = requests.get('https://book.douban.com/', headers=headers).text\n",
    "content = search('')\n",
    "pattern = re.compile('<li.*?cover.*?href=\"(.*?)\".*?title=\"(.*?)\".*?more-meta.*?author\">(.*?)</span>.*?year\">(.*?)</span>.*?</li>', re.S)\n",
    "result = re.findall(pattern, content)\n",
    "print(content)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
